## Figure 3: Comparison between Exporting Respondents and All PROCOMER
## Exporters
    
rm(list=ls(all=TRUE))

## library -----------------------------------------------------------
library(ggplot2)
library(gridExtra)
library(dplyr)


## setup -------------------------------------------------------------
## setting the working directory as the current directory
setwd(getwd())

## check/create the folder to put outputs
if (file.exists(file.path(getwd(), "figs"))){
    FIG_DIR <- file.path(getwd(), "figs")
} else {
    dir.create(file.path(file.path(getwd(), "figs")))
    FIG_DIR <- file.path(getwd(), "figs")
}
    
source("util.R")

## -------------------------------------------------------------------
## firms responded twice
resptwice <- c("R_3lKP5vVopq27Q5T", "R_ewwjPKYf1mQtVkx",
               "R_8wVpt534HFyvbG5","R_7QwtntdSZ7ONCaV",
               "R_787WpeOkIatHWkZ", "R_8umeWuf3qISiJcV",
               "R_85MeGXniJgeyA6h", "R_0Sy2CDRAw2ddHql",
               "R_5pDS87UuBf5ekU5","R_9YwTFMe01hCQIDP",
               "R_38WL2rp1ocK9c1v","R_0qEMbRZcn0W3gMd",
               "R_3UdWL2nJMFa61hP","R_ctKLc9LF3hANh4x")

## read survey data
survey <- read.csv("crsurvey_full.csv")
rm <- which(survey$V1 %in% resptwice)
survey <- survey[-rm,]
id <- data.frame(id=survey$V1, proccode=survey$proccode)


## reading procomer data
options(scipen = 999)
procomer <- read.csv(file.path("procomerALL.csv"))
procomer$hs10 <- gsub("^p", "", procomer$hs10)
procomer$hs10 <- gsub("e\\+0800", "e+08", procomer$hs10)
procomer$hs10 <- gsub("e\\+0900", "e+09", procomer$hs10)

procomer$hs10 <- as.numeric(procomer$hs10)
which(procomer$hs10 == "Inf")


## fixing 9 digit cases
hs10 <- as.character(procomer$hs10)
a <- nchar(hs10)
idx <- which(a==9)
hs10[idx] <- paste("0", hs10[idx], sep="")
procomer$hs10 <- hs10

## checking the number of unique HS 10 digit products
length(unique(procomer$hs10))

## checking respondents vs procomer firms ----------------------------a
ids <- unique(as.character(id$proccode))
## find procomer firms
ids <- ids[which(ids!="")]
ids[which(ids == "E2616; E4532")] <- "E2616"

## now getting ids except for our respondents from procomer
sub <- as.data.frame(filter(procomer, year==2012))

f1 <- ids[ids %in% sub$procomerID]
f2 <- as.character(unique(filter(sub, !(procomerID %in% f1))$procomerID))

sub$resp <- ifelse(sub$procomerID %in% f1, 1, 0)
dfA <- as.data.frame(filter(sub, resp==1))
dfB <- as.data.frame(filter(sub, resp==0))


df <- as.data.frame(summarize(group_by(sub, procomerID),
                                  ndest = length(unique(destination)),
                                  nprod = length(unique(hs10)),
                                  mvalue = log(median(value, na.rm=T))))
df$resp <- ifelse(df$procomerID %in% f1, 1, 0)



summary(lm(mvalue~as.factor(resp), data=df))
## number of products
summary(lm(nprod ~ as.factor(resp), data=df))


## median value of trade --------------------------------------------

mod <- lm(mvalue~as.factor(resp), data=df)
means <- as.numeric(mod$coef)


pdf(file.path(FIG_DIR, "figure3a.pdf"),
    width=12,height=10) # for paper

par(cex.main=2.3, cex.lab=2, cex.axis=1.5,
    mar=c(6,10,7,10))

plot( density(df[which(df$resp==1),"mvalue"]),
     col="grey20", xlim=c(0,17), ylim=c(0,.3),
     main="Distribution of Product Level Exports",
     lwd=3, ylab="Density", xlab="")

lines(density(df[which(df$resp==0),"mvalue"]),
      col="grey50", lwd=2, lty=2)

mtext(side=1, "Median Value of Product Trade (logged)", line=3.5, cex=2.2)
legend("topright", lty=c(1,2), lwd=c(2,2),
       c("Respondents", "All Exporters"),
       bty="n", cex=2)

abline(v=(means[1]+means[2]), lty=1, col="grey20", lwd=1.5)
abline(v=means[1], lty=2, col="grey50", lwd=1.5)
text(4.5, 0.25, "Difference in Means: 0.19 \n(p=0.161)", cex=2)

dev.off()





## top ten destinations

top10a <- sort(table(dfA$destination)/nrow(dfA), decreasing=T)[1:10]
names(top10a)
namesa <- c("USA", "Nicaragua", "Panama", "Guatemala", "Honduras", "El Salvador",
            "Dominican\nRepublic", "Mexico", "Netherlands", "Puerto Rico")

top10a <- as.numeric(rev(top10a))
namesa <- rev(namesa)


top10b <- sort(table(dfB$destination)/nrow(dfB), decreasing=T)[1:10]
names(top10b)
namesb <- c("Nicaragua", "USA", "Panama", "Guatemala", "El Salvador",
            "Honduras", "Mexico", "Dominican\nRepublic", "Netherlands", "Colombia")
top10b <- as.numeric(rev(top10b))
namesb <- rev(namesb)


pdf(file.path(FIG_DIR, "figure3b.pdf"),
    width=12,height=10) # for paper

par(cex.main=2.3, cex.lab=2, cex.axis=1.5,
    mar=c(6,10,7,10))

## separation between bars
sepr <- 0.2
nr <- 10
## create plot
plot(c(-1,1), c(1,nr), type="n",
     ylim=c(1-(1-sepr)/2-sepr,
            nr+(1-sepr)/2+sepr),
     xlim=c(-.3,.3),
     main="Proportion of Product-Export Destinations",
     ylab="", yaxt="n", xaxt="n",
     xlab="", )


for(i in 1:nr){
    abline(h=i, lty=2, col="grey80")

}
## plot bars
for(i in 1:nr) {
    ## respondents
    lo <- 0
    hi <- -as.numeric(top10a[i])
   
    polygon(x=c(lo, lo, hi, hi),
            y=c(i-(1-sepr)/2, i+(1-sepr)/2,
                i+(1-sepr)/2, i-(1-sepr)/2),
            col="grey30", border=NA)

    ## all else
    lo1 <- 0
    hi1 <- as.numeric(top10b[i])
    
    polygon(x=c(lo1, lo1, hi1, hi1),
            y=c(i-(1-sepr)/2, i+(1-sepr)/2,
                i+(1-sepr)/2, i-(1-sepr)/2),
            col="grey50", border=NA)
        
}

## create y-axis
axis(3, at=round(seq(from=-.3, to=.3, by=0.1),2), labels=as.character(round(abs(seq(from=-.3, to=.3, by=0.1)),2)))
axis(2, at=1:nr, las=2, xpd=NA, labels=namesa) 
axis(4, at=1:nr, las=2, xpd=NA, labels=namesb) 


## add center line
abline(v=0, lwd=2, col="white")
abline(v=-0.3, lty=2, col="grey80")
abline(v=-0.2, lty=2, col="grey80")
abline(v=-0.1, lty=2, col="grey80")
abline(v=0.1, lty=2, col="grey80")
abline(v=0.2, lty=2, col="grey80")
abline(v=0.3, lty=2, col="grey80")

axis(1, at=c(-0.2,0.2), labels=c("",""))

mtext(side=1, line=2.5, at="-0.2", "Respondents", cex=2)
mtext(side=1, line=2.5, at="0.2", "All Exporters", cex=2)

dev.off()





